*Write a permanent log file;
PROC PRINTTO LOG="C:\Research\Alcohol and Morbidity\7. Code Archive\Data Files and Code That Produced Them\P06 Inpatient Code up Visits NY.log" NEW;
RUN;

*Write a permanent list file;
PROC PRINTTO PRINT="C:\Research\Alcohol and Morbidity\7. Code Archive\Data Files and Code That Produced Them\P06 Inpatient Code up Visits NY.lst" NEW;
RUN;

/*
Code up NY 1993-2008

Notes 
1. Due suppression of admission data and DOB cant compute exact age for aids admissions - 
   There are only about 250 of these on average among 21-22 year olds per file year except for 2008 when it is several times this
2. There are no separate ecodes prior to 2003, in 2002 and before they are at the end of the ICD-9 list
3. Gender is only missing for 103 observations 
*/

libname NY_INP "D:\Data\Hospital Inpatient Records\Hospital New York (1992-2008)";

*Variables to keep;
%let keep_v1 = AGE asource DX1 PAY1 TOTCHG;

%macro pull_ny(lib,file,keep_v2,keep_v3,keep_v4,keep_v5,keep_v6,keep_v7,ln);
data &file. (drop = &keep_v4.);
   format state $2.;
   set &lib..&file. (keep = &keep_v1. &keep_v2. &keep_v3. &keep_v4. &keep_v5. &keep_v6. &keep_v7.);
  *Record the file name and state postal code; 
   file = "&file.";
   state = substr("&lib.",1,2);
   file_year = substr("&file.",14,4);
  *Compute age in months relative to age 21 - the intck function counts the number of starting 
   points of the interval crossed. So an ED visit occurring in the birth month will be 
   have a months_21 value of 0;
   if substr("&file.",14,4) in (1995,1996,1997) then do;
      months_21 = intck('month',mdy(month(dob),14,year(dob)+21),mdy(month(ADATE),15,year(ADATE)));
	  ad_date = mdy(month(ADATE),15,year(ADATE));;
   end; 
   if substr("&file.",14,4) not in (1995,1996,1997) then do;
      months_21 = intck('month',mdy(bmonth,14,byear+21),mdy(amonth,15,ayear));
	  ad_date = mdy(amonth,15,ayear);
   end;
   if months_21 = . then bad_date = 1; else bad_date = 0;
  *Flag for ED admission on inpatient in some states they switch from one source coding to 
   another in 2007. In 2007 the rates dip in those states suggesting a slippage in coding is occurring;
   from_ED = 0;
   if asource = "1" then from_ED = 1; 
   if &keep_v7. = "7" then from_ED = 1;
  *Code up discharge;
   if &keep_v6. = 20 then died = 1; else died = 0;
  *Flag for pregnant;
   if substr(DX1,1,2) in ('63','64','65','66','67') then preg_related = 1; else preg_related = 0;
  *Restrict based on the age in months variable;
   male = 0;
   if substr("&file.",14,4) <= 1997 and &keep_v5. = 1 then male = 1; 
   if substr("&file.",14,4) > 1997 and &keep_v5. = 0 then male = 1; 
  *In a typical year gender is missing for one observation;
   if substr("&file.",14,4) <= 1997 and &keep_v5. = . then male = .; 
   if substr("&file.",14,4) > 1997 and &keep_v5. = . then male = .; 
   count = 1;
run;
%mend;

%pull_ny(NY_INP,Ny_inpatient_1993,DX1,DX2-DX11,bmonth byear amonth ayear,sex,DISP,ayear,11);
%pull_ny(NY_INP,Ny_inpatient_1994,DX1,DX2-DX17,bmonth byear amonth ayear,sex,DISP,ayear,17);
%pull_ny(NY_INP,Ny_inpatient_1995,DX1,DX2-DX17,DOB ADATE,sex,DISP,DOB,17); 
%pull_ny(NY_INP,Ny_inpatient_1996,DX1,DX2-DX17,DOB ADATE,sex,DISP,DOB,17);
%pull_ny(NY_INP,Ny_inpatient_1997,DX1,DX2-DX17,DOB ADATE,sex,DISP,DOB,17);
%pull_ny(NY_INP,Ny_inpatient_1998,DX1,DX2-DX17,bmonth byear amonth ayear,FEMALE,DISPUNIFORM,ayear,17);
%pull_ny(NY_INP,Ny_inpatient_1999,DX1,DX2-DX17,bmonth byear amonth ayear,FEMALE,DISPUNIFORM,ayear,17);
%pull_ny(NY_INP,Ny_inpatient_2000,DX1,DX2-DX17,bmonth byear amonth ayear,FEMALE,DISPUNIFORM,ayear,17);
%pull_ny(NY_INP,Ny_inpatient_2001,DX1,DX2-DX17,bmonth byear amonth ayear,FEMALE,DISPUNIFORM,ayear,17);  
%pull_ny(NY_INP,Ny_inpatient_2002,DX1,DX2-DX17,bmonth byear amonth ayear,FEMALE,DISPUNIFORM,ayear,17); 
%pull_ny(NY_INP,Ny_inpatient_2003,ECODE1-ECODE7,DX2-DX15,bmonth byear amonth ayear,FEMALE,DISPUNIFORM,ayear,17); 
%pull_ny(NY_INP,Ny_inpatient_2004,ECODE1-ECODE7,DX2-DX15,bmonth byear amonth ayear,FEMALE,DISPUNIFORM,ayear,17); 
%pull_ny(NY_INP,Ny_inpatient_2005,ECODE1-ECODE6,DX2-DX15,bmonth byear amonth ayear,FEMALE,DISPUNIFORM,ayear,17); 
%pull_ny(NY_INP,Ny_inpatient_2006,ECODE1-ECODE8,DX2-DX15,bmonth byear amonth ayear,FEMALE,DISPUNIFORM,ayear,17); *Use year as a palce holder here;
%pull_ny(NY_INP,Ny_inpatient_2007,ECODE1-ECODE8,DX2-DX15,bmonth byear amonth ayear,FEMALE,DISPUNIFORM,PointOfOriginUB04,17); 
%pull_ny(NY_INP,Ny_inpatient_2008,ECODE1-ECODE9,DX2-DX15,bmonth byear amonth ayear,FEMALE,DISPUNIFORM,PointOfOriginUB04,17); 

data New_york_inp; 
   format ad_date date.;
   set Ny_inpatient_1993 Ny_inpatient_1994 Ny_inpatient_1995 Ny_inpatient_1996 Ny_inpatient_1997 Ny_inpatient_1998 Ny_inpatient_1999 Ny_inpatient_2000 
       Ny_inpatient_2001 Ny_inpatient_2002 Ny_inpatient_2003 Ny_inpatient_2004 Ny_inpatient_2005 Ny_inpatient_2006 Ny_inpatient_2007 Ny_inpatient_2008;
  *There are 579287 bad dates out of 40448800 records (1.43 percent) with 22 percent of the bad dates having a primary DX of 042 which is HIV
   this leaves 39,869,513 records;         
   if bad_date = 1 then delete; 
  *Restricting to the appropriate age group reduces this to 2,106,255 records;
   if months_21 < -40 or months_21 > 39 then delete;
   *Remove inpatient stays that started before hte time period we are examining reduces it to  2,104,259 records;
	if state = 'NY' and ad_date < mdy(1,1,1993) then delete;
  *Write out e-codes for early years;
   array icd{25} dx1-dx25;
   array e_dx{9} $5. e_dx1-e_dx9;
   array ec{9}  ECODE1-ECODE9;
   i = 1;
   j = 1;
   do until (i > 25);
      if file_year < 2003 and substr(icd{i},1,1) = 'E' then do;
         e_dx{j} = icd{i};
		 j = j + 1;
      end;
      if icd{i} = "" then i = 99;*Exit loop;
      i+ +1; *Increment loop;
   end;
  *Need to do this in two steps otherwise it retains the E code across rows;
   if file_year < 2003 then do;
      do i = 1 to 9;
          ec{i} = e_dx{i};
      end;
   end;
  *Code up alcohol mention;
   alcohol_mention = 0;
   alcohol_mention_oth = 0;
   i = 2; *Start with second ICD code;
   do until (i > 25);
      if substr(icd{i},1,3) in ('291','303') or substr(icd{i},1,4) = '3050' then alcohol_mention = 1; 
	 *These are other minor mentions of alcohol;
      if substr(icd{i},1,4) in ("3575", "4255", "5353", "5710", "5711", "5712", "5713", "7903", "9800", "V113", "V791" )  then alcohol_mention_oth = 1; 
      if icd{i} = '' then i = 99; *Exit loop;
		 i+ +1;
   end;  

	alcohol = 0;
    if substr(DX1,1,3) in ('291','303') or substr(DX1,1,4) = '3050' then alcohol = 1; 
   *Overall injury category for regressions;
	if ECODE1 ne '' then injury = 1; else injury = 0;
   *Source and location of injury;
   inj_by_self_e = 0;
   inj_by_oth_e = 0;
   place = "       ";
  *Code up alcohol or drugs any mention;
   i = 1;
   do until (i > 8);
     *There are very few people that are coded as both (175) and going over all the ecodes only adds a few percent to the number coded; 
      if substr(ec{i},2,2) = '95' then inj_by_self_e = 1; 
      if substr(ec{i},2,2) = '96' then inj_by_oth_e = 1;  
	 *Place often coded for assaults;
      if substr(ec{i},2,3) = '849' then place = ec{i};
      if substr(ec{i},2,4) in ('8600','8601') then alcohol_mention_oth = 1; 
      if ec{i} = '' then i = 99; *Exit loop;
		 i+ +1;
   end;
  *Create mutually exclusive categories precedence - alcohol,;
    if alcohol = 0 and inj_by_oth_e = 1 then inj_by_oth  = 1; else inj_by_oth = 0;
    if alcohol = 0 and inj_by_oth_e = 0 and  inj_by_self_e = 1 then inj_by_self  = 1; else inj_by_self = 0;
    if alcohol = 0 and inj_by_oth_e = 0 and  inj_by_self_e = 0 and  injury = 1 then inj_accident  = 1; else inj_accident = 0;
    if alcohol = 0 and inj_by_oth_e = 0 and  inj_by_self_e = 0 and  injury = 0 and alcohol_mention = 1 then alcohol_on_dx = 1; else alcohol_on_dx = 0; *Alcohol not marked as primary cause but mentioned;
    if alcohol = 0 and inj_by_oth_e = 0 and  inj_by_self_e = 0 and  injury = 0 and alcohol_mention = 0 then illness = 1; else illness = 0; *Residual category;
  *Create combined categories;
   alcohol_any = alcohol + alcohol_on_dx;
   injury_or_alc =  inj_by_oth + inj_by_self + inj_accident + alcohol + alcohol_on_dx ;
  *Make sure the list is exhaustive;
	visit = alcohol + inj_by_self + inj_by_oth + inj_accident + alcohol_on_dx + illness;
  *Flag for insurance;
   if pay1 = 2	then Medicaid = 1; else Medicaid = 0;
   if pay1 = 3	then Private = 1; else Private = 0;
   if pay1 = 4	then Self_pay = 1; else Self_pay = 0;
   if pay1 not in (2,3,4) then other_ins = 1; else other_ins = 0;
   file_short = substr(file,1,5);
  *Code up female - need to do it this way as had female vs sex in underlying files;
   if male = 1 then female = 0;
   if male = 0 then female = 1;

   if TOTCHG = . then miss_chg = 1; else miss_chg = 0; 
   if female = . then miss_female = 1; else miss_female = 0; 
run;
proc means data = New_york_inp ;
   var  visit alcohol_any injury_or_alc inj_by_self  inj_by_oth inj_accident illness died medicaid private self_pay other_ins  preg_related   female miss_chg miss_female alcohol_mention;
run;

proc datasets;
   delete Ny_inpatient_1993 Ny_inpatient_1994 Ny_inpatient_1995 Ny_inpatient_1996 Ny_inpatient_1997 Ny_inpatient_1998 Ny_inpatient_1999 Ny_inpatient_2000 
       Ny_inpatient_2001 Ny_inpatient_2002 Ny_inpatient_2003 Ny_inpatient_2004 Ny_inpatient_2005 Ny_inpatient_2006 Ny_inpatient_2007 Ny_inpatient_2008;
run;

*Direct list file back to lst window;
PROC PRINTTO PRINT=PRINT;
RUN; 

*Direct log file back to log window;
PROC PRINTTO LOG=LOG;
RUN; 
